
** Set Workspace **
global dir=""
cd "$dir"

*Install packages and schemes:
net install gr0002_3, from(http://www.stata-journal.com/software/sj4-3)
ssc install grqreg

use "$dir/ehpm_incomemodule_wreform.dta", clear 
capture drop ln_hh_inc_pc hh_inc_pc_real ln_hh_inc_pc_real 
gen ln_hh_inc_pc = log(hh_income_pc)

**Original real income measure
gen hh_inc_pc_real = (year==2000)*hh_income_pc*71.57/100 + ///
                 (year==2001)*hh_income_pc*74.25/100 + (year==2004)*hh_income_pc*80.68/100 + ///
                 (year==2005)*hh_income_pc*84.47/100 + (year==2006)*hh_income_pc*87.88/100 + ///
                 (year==2007)*hh_income_pc*91.90/100 + (year==2008)*hh_income_pc*98.06/100 + ///
                 (year==2009)*hh_income_pc*99.10/100 + (year==2011)*hh_income_pc*105.13/100 + ///
                 (year==2012)*hh_income_pc*106.95/100 + (year==2013)*hh_income_pc*107.79/100 
gen ln_hh_inc_pc_real = log(hh_inc_pc_real )

*Corrected real income measure
gen hh_inc_pc_realC = (year==2000)*hh_income_pc*71.57/100 + ///
                 (year==2001)*hh_income_pc*74.25/100 + (year==2004)*hh_income_pc*80.68/100 + ///
                 (year==2005)*hh_income_pc*84.47/100 + (year==2006)*hh_income_pc*87.88/100 + ///
                 (year==2007)*hh_income_pc*91.90/100 + (year==2008)*hh_income_pc*98.06/100 + ///
                 (year==2009)*hh_income_pc*99.10/100 + (year==2011)*hh_income_pc*105.13/100 + ///
                 (year==2012)*hh_income_pc*106.95/100 + (year==2013)*hh_income_pc*107.79/100  + ///
				 (year==2010)*hh_income_pc
gen ln_hh_inc_pc_realC = log(hh_inc_pc_realC )


****************************************
*** Replicating original results
****************************************

*****Cols 1 and 2 Table 5 of original paper, Cols 1 and 2 of Table 3 in comment
*Income
preserve
eststo T3c1: reg hh_inc_pc_real Above500 norm_dist c.norm_dist#Above500 i_year* sex  age age2 if abs(norm_dist) <  300 , cluster(Expropretario_ISTA)
	tab Above500 if e(sample)
eststo T3c2: reg hh_inc_pc_real Above500 norm_dist c.norm_dist#Above500 i_year*  sex  age age2 if abs(norm_dist) < 150 , cluster(Expropretario_ISTA)
	tab Above500 if e(sample)
restore	

*****Cols 3 and 4 Table 5 of original paper, Cols 1 and 2 of Table 1 in comment
*IQR
preserve
collapse (iqr) hh_income_pc hh_inc_pc_real (mean) num_members norm_dist Above500, by(match_id Expropretario_ISTA i_year*) cw
eststo T1c1:  reg hh_inc_pc_real Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 300 , cluster(Expropretario_ISTA)
	tab Above500 if e(sample)
eststo T1c2: reg hh_inc_pc_real Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 150 , cluster(Expropretario_ISTA)
	tab Above500 if e(sample)
restore

*****Figure 6 in orginal paper, Figure 1 left panel in comment
preserve
gen Above500_QPlot = Above500
label var Above500_QPlot "Quantile Estimates for: Above 500 (ha)"
drop if num_members < 5
local bwidth =150
bsqreg ln_hh_inc_pc_real  Above500_QPlot norm_dist norm_dist_Above  i_year1-i_year8 i_year10-i_year11  if abs(norm_dist) < 150 & hh_income_pc >0,  q(.50) 
set scheme lean1
grqreg Above500_QPlot,  ci reps(40) qstep(.2) seed(821)   subtitle("Replication figure")
restore


****************************************
*** Updated results
****************************************

*Table 1, Cols 5 and 6: Sample restriction to original estimation sample
preserve
egen group=group(match_id Expropretario_ISTA year)
bys group: gen groupN=_N
collapse (iqr) hh_inc_pc_real (mean)  groupN  norm_dist Above500, by(match_id Expropretario_ISTA i_year*) cw
eststo T1c5: reg hh_inc_pc_real Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 300 &  groupN>4, cluster(Expropretario_ISTA)
tab Above500 if e(sample)
eststo T1c6: reg hh_inc_pc_real Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 150&  groupN>4 , cluster(Expropretario_ISTA)
tab Above500 if e(sample)
restore


*** Remove duplicate observations

*Duplicate by "identifying variables" (add norm_dist because a few individuals that are the same are distributed to different properties, removed below)
duplicates drop dept  munic canton year hh_income hh_income_pc Landholding_Type Total_Workers age norm_dist,force

*These are checked against the rawdata. Identified by manually checking all individuals from households with more than one observation
*Remove those that do not exist in the raw data
drop if  munic==3 & canton==17 & hh_income_pc==32 & age==48 & year==2008
drop if  munic==3 & canton==17 & hh_income_pc==38.46 & age==29 & year==2008
drop if  munic==3 & canton==6 & hh_income_pc==21.39 & age==53 & year==2010
drop if  munic==3 & canton==6 & hh_income_pc==28.35 & age==24& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==47.38 & age==66& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==47.38 & age==25& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==32.39 & age==66& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==32.39 & age==22& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==32.39 & age==25& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==20.75 & age==66& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==20.75 & age==22& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==39.83 & age==25& year==2010
drop if  munic==16 & canton==1 & hh_income_pc==39.83 & age==22& year==2010
drop if  munic==6 & canton==7 & hh_income_pc==25.97 & age==23& year==2011
drop if  munic==6 & canton==7 & hh_income_pc==25.97 & age==35& year==2011
drop if  munic==6 & canton==7 & hh_income_pc==140.31 & age==50& year==2011
drop if  munic==6 & canton==7 & hh_income_pc==140.31 & age==23& year==2011
drop if  munic==6 & canton==7 & hh_income_pc==142.53 & age==50& year==2011
drop if  munic==6 & canton==7 & hh_income_pc==142.53 & age==35& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==21.83 & age==32& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==21.83 & age==29& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==21.83 & age==34& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==21.83 & age==36& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==22.25 & age==24& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==22.25 & age==34& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==22.25 & age==29& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==22.25 & age==36& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==43.88 & age==32& year==2011
drop if  munic==7 & canton==1 & hh_income_pc==43.88 & age==24& year==2011
drop if  munic==6 & canton==8 & hh_income_pc==24.75 & age==32 & year==2011
drop if  munic==6 & canton==8 & hh_income_pc==21.12 & age==62 & year==2011
drop if  munic==8 & canton==0 & hh_income_pc==139.83 & age==57 & year==2011
drop if  munic==8 & canton==0 & hh_income_pc==55.5 & age==50 & year==2011
drop if  munic==8 & canton==0 & hh_income_pc==139.83 & age==57& year==2012
drop if  munic==8 & canton==0 & hh_income_pc==55.5 & age==50& year==2012
drop if  munic==7 & canton==10 & hh_income_pc==63.19 & age==22& year==2013
drop if  munic==7 & canton==10 & hh_income_pc==82.36 & age==56& year==2013
drop if  munic==5 & canton==10 & hh_income_pc==25 & age==49& year==2013
drop if  munic==5 & canton==10 & hh_income_pc==25 & age==26& year==2013
drop if  munic==5 & canton==10 & hh_income_pc==11.42 & age==26& year==2013
drop if  munic==5 & canton==10 & hh_income_pc==66.67 & age==49& year==2013
drop if  munic==7 & canton==11 & hh_income_pc==104 & age==32& year==2013
drop if  munic==7 & canton==11 & hh_income_pc==61.92 & age==65& year==2013

*OBS: 6 individuals are allocated to TWO properties. Remove these to assure unique estimates
bysort dept  munic canton year hh_income hh_income_pc Landholding_Type Total_Workers age: gen dupl_HH=_N
drop if dupl_HH>1


*Table 1, Cols 3 and 4: "Corrected data"
preserve
collapse (iqr) hh_inc_pc_realC (mean)  norm_dist Above500, by(match_id Expropretario_ISTA i_year*) cw
eststo T1c3:  reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 300 , cluster(Expropretario_ISTA)
tab Above500 if e(sample)
eststo T1c4:  reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 150 , cluster(Expropretario_ISTA)
tab Above500 if e(sample)
restore


*Table 1, Cols 7 and 8:  "Corrected data and sample restriction"
preserve
egen group=group(match_id Expropretario_ISTA year)
bys group: gen groupN=_N
collapse (iqr) hh_inc_pc_realC (mean)  groupN  norm_dist Above500, by(match_id Expropretario_ISTA i_year*) cw
eststo T1c7: reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 300 &  groupN>4, cluster(Expropretario_ISTA)
tab Above500 if e(sample)
eststo T1c8: reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 150&  groupN>4 , cluster(Expropretario_ISTA)
tab Above500 if e(sample)
restore


*Table 2, Cols 1 and 2: "Property-year, n>3"
preserve
egen group=group(match_id Expropretario_ISTA year)
bys group: gen groupN=_N
collapse (iqr) hh_inc_pc_realC (mean)  groupN norm_dist Above500, by(match_id Expropretario_ISTA i_year*) cw
eststo T2c1:  reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 300 &  groupN>3, cluster(Expropretario_ISTA)
tab Above500 if e(sample)
eststo T2c2: reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 150&  groupN>3 , cluster(Expropretario_ISTA)
tab Above500 if e(sample)
restore


*Table 2, Cols 3 and 4: "Property-year, n>2"
preserve
egen group=group(match_id Expropretario_ISTA year)
bys group: gen groupN=_N
collapse (iqr) hh_inc_pc_realC (mean)  groupN norm_dist Above500, by(match_id Expropretario_ISTA i_year*) cw
eststo T2c3: reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 300 &  groupN>2, cluster(Expropretario_ISTA)
tab Above500 if e(sample)
eststo T2c4: reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* if  abs(norm_dist) < 150&  groupN>2 , cluster(Expropretario_ISTA)
tab Above500 if e(sample)
restore


*Table 3, Cols 3 and 4: "Corrected data"
eststo T3c3: reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year* sex  age age2 if abs(norm_dist) <  300 , cluster(Expropretario_ISTA)
	sum hh_income_pc if abs(norm_dist)<300
	tab Above500 if e(sample)
eststo T3c4: reg hh_inc_pc_realC Above500 norm_dist c.norm_dist#Above500 i_year*  sex  age age2 if abs(norm_dist) < 150 , cluster(Expropretario_ISTA)
	tab Above500 if e(sample)
	sum hh_income_pc if abs(norm_dist)<150
	

*Figure 1, right panel, "corrected figure"
preserve
gen Above500_QPlot = Above500
label var Above500_QPlot "Quantile Estimates for: Above 500 (ha)"
bsqreg ln_hh_inc_pc_realC   Above500_QPlot norm_dist norm_dist_Above age age2 i_year1-i_year8 i_year10-i_year11  if abs(norm_dist) < 150 & hh_income_pc >0,  q(.50) 
set scheme lean1
grqreg Above500_QPlot,  ci reps(40) qstep(.2) seed(821)   subtitle("Corrected figure") 
restore


*Table 1
label var Above500 "Above500"
noisily esttab T1c1 T1c2 T1c3 T1c4 T1c5 T1c6 T1c7 T1c8, ///
	keep(Above500) s(N ) label nomtitles starlevels(* 0.10 ** 0.05 *** 0.01) se t(%8.2g)

*Table 2
noisily esttab T2c1 T2c2 T2c3 T2c4, ///
	keep(Above500) s(N ) label nomtitles starlevels(* 0.10 ** 0.05 *** 0.01) se t(%8.2g)

*Table 3
noisily esttab T3c1 T3c2 T3c3 T3c4 , ///
	keep(Above500) s(N ) label nomtitles starlevels(* 0.10 ** 0.05 *** 0.01) se t(%8.2g)

